library(ggplot2)
library(dplyr)
library(xtable)
library(marginaleffects)
library(stargazer)
source("functions.R")

##Load and pool Study 1 and Study 2 data

#study 1 data: evidenced (manifest) treated groups only, plus control
d_study1 <- read.csv("surveydata_clean_study1.csv")
d_study1 <- d_study1[d_study1$disc_treatment.7 %in% c("control", "evidenced_stat", "evidenced_epis"),]
d_study1 <- d_study1[!d_study1$nonwest,]

#study 2 data: no-rhetoric treated groups only, plus control
d_study2 <- read.csv("surveydata_clean_study2.csv")
d_study2 <- d_study2[!is.na(d_study2$disc_treatment),] #delete lines with missing wave 2
d_study2$immi <- ifelse(d_study2$immi_notna > 1, d_study2$immi, NA) #treat immi as NA for people with only one response
d_study2 <- d_study2[!d_study2$rhetoric & !is.na(d_study2$disc_treatment),]
d_study2$treated <- d_study2$disc_treatment != "control"

#equalize variable names and levels
colnames(d_study1) <- gsub(".7", "", colnames(d_study1))
d_study1$treated <- d_study1$disc_treatment != "control"
d_study1$disc_treatment <- recode(d_study1$disc_treatment,
                                  "evidenced_stat"="thematic",
                                  "evidenced_epis"="episodic")
d_study1$disc_treatment <- factor(d_study1$disc_treatment, levels=c("control", "thematic", "episodic"))
d_study2$disc_treatment <- factor(d_study2$disc_treatment, levels=c("control", "thematic", "episodic"))

#join together
d_study1$study <- "1"
d_study2$study <- "2"
commoncols <- c("disc_problem", "disc_widespr", "disc_treatment", "treated", "immi", "ideology", "study")
d_pool <- rbind(d_study2[commoncols], d_study1[commoncols])

#imputing immigration attitudes
d_pool$immiNA <- is.na(d_pool$immi)
d_pool$immi_impute <- d_pool$immi
d_pool$immi_impute[d_pool$immiNA] <- mean(d_pool$immi, na.rm=T)
#after pooling, because the dummy immiNA approach assumes all got the same imputed value 


#pooled effects of both evidenced treatments

#on concern outcome (seeing discr. as a problem)
fitfigure_evidpool <- lm(disc_problem ~ disc_treatment + immi_impute + immiNA, data=d_pool)

#on widespread outcome
fitfigure_evidpool_widespr <- lm(disc_widespr ~ disc_treatment + immi_impute + immiNA, data=d_pool)


##main text Figure 2, right hand panels
plot_estCIs(fitfigure_evidpool, "evidenced_only/discr_evidenced_effects_pool", "seeing discrimination as problem",
            "disc_treatment", c("thematic", "episodic"),  treatment_labels=c("audit\nstudy", "supermarket\nstory"),
            title="Pooled", ymax=.51, w=3.5)
plot_estCIs(fitfigure_evidpool_widespr, "evidenced_only/discr_evidenced_effects_widespr_pool", "seeing discrimination as widespread",
            "disc_treatment", c("thematic", "episodic"),  treatment_labels=c("audit\nstudy", "supermarket\nstory"),
            title="Pooled", ymax=1, w=3.5)


##additional results mentioned in-text

#pooled comparison of statistical v episodic manifest treatments
#concern outcome
summary(lm(disc_problem ~ disc_treatment + immi_impute + immiNA, data=d_pool[d_pool$disc_treatment != "control",]))
#widespread outcome
summary(lm(disc_widespr ~ disc_treatment + immi_impute + immiNA, data=d_pool[d_pool$disc_treatment != "control",]))

#unexpected difference between study 1 v. 2 in effect of statistical
#manifest (evidenced) treatment
#concern outcome, not significant
summary(lm(disc_problem ~ disc_treatment*study + immi_impute + immiNA,
           data=d_pool[d_pool$disc_treatment != "episodic",]))
#widespread outcome, marginally significant
summary(lm(disc_widespr ~ disc_treatment*study + immi_impute + immiNA,
           data=d_pool[d_pool$disc_treatment != "episodic",]))


##appendix E: full results behind figures

stargazer(list(fitfigure_evidpool, fitfigure_evidpool_widespr),
          covariate.labels=c("Audit study", "Supermarket story", "Immigration Att.", "Imm. Att. Missing"),
          dep.var.labels=c("Problem","Widespread"),
          omit.stat=c("f","ser"), digits=2, out="tables/fullfig34_pooled.tex",
          label="tab:fullfig34_pool",
          title="Effect of the manifest-evidence treatments in Study 1 and Study 2
          (pooled) on seeing discrimination as a problem (1-5 scale) and as widespread (0-10 scale)")


##appendix F: full results behind figures without imputation

fitfigure_evidpool_noimp <- lm(disc_problem ~ disc_treatment + immi, data=d_pool)
fitfigure_evidpool_widespr_noimp <- lm(disc_widespr ~ disc_treatment + immi, data=d_pool)
stargazer(list(fitfigure_evidpool_noimp, fitfigure_evidpool_widespr_noimp),
          covariate.labels=c("Audit study", "Supermarket story", "Immigration Att."),
          dep.var.labels=c("Problem","Widespread"),
          omit.stat=c("f","ser"), digits=2, out="tables/fullfig34_pooled_noimp.tex",
          label="tab:fullfig34_pool_noimp",
          title="Effect of the manifest-evidence treatments in Study 1 and Study 2
          (pooled) on seeing discrimination as a problem (1-5 scale) and as widespread (0-10 scale). No imputation of
          missing immigration attitudes.")


##appendix H: Heterogeneous treatment effects with evidenced only, pooled, episodic and thematic v. control

outcomes <- c("disc_problem","disc_widespr")
moderators <- c("ideology", "immi")
combos <- apply(expand.grid(c("Audit","Supermarket"), c("Ideology", "Immig.")), 1, paste, collapse=", ")
ints <- lapply(outcomes, function(outcome){
  ints_permod <- lapply(moderators, function(moder){
    
    #get regression outputs
    formula <- as.formula(paste0(outcome, "~  disc_treatment*", moder))
    fit <- lm(formula, data=d_pool)
    int <- tail(summary(fit)$coefficients[,c(1,2,4)], 2)
    
    #add n
    n <- nobs(fit)
    int <- cbind(int, n)
    
    #add MDE
    MDE <- int[,2]*2.8/sd(d_pool[,outcome], na.rm=T)
    int <- cbind(int, MDE)
    
    #get CATEs at the 1st and 3rd quartile
    qs <- quantile(d_pool[moder],  probs = c(.25, .75), na.rm=T)
    CATEs <- avg_slopes(fit, variables = "disc_treatment", by = moder)
    #print(CATEs)
    CATEs_at_qs <- sapply(qs, function(q){
      CATEs[CATEs[,moder]==q, "estimate"]
    })
    CATEs_at_qs <- CATEs_at_qs[c(2,1),]
    #flip order because otherwise, episode v. control is reported first
    
    #add CATEs
    int <- cbind(int, CATEs_at_qs)
    
    #shorten/adjust column names
    colnames(int) <- c("Est.", "SE", "p", "n", "MDE", "CATEq1", "CATEq3")
    
    #return results
    int
    
  })
  ints_permod <- do.call(rbind, ints_permod)
  rownames(ints_permod) <- combos
  ints_permod
})

#add Benjamini-Hochberg adjusted p-values
pvalues <- c(ints[[1]][,3], ints[[2]][,3])
fdrs <- p.adjust(pvalues, method="BH")
ints[[1]] <- cbind(ints[[1]][,1:3], "p, BH"=fdrs[1:4], ints[[1]][,4:7])
ints[[2]] <- cbind(ints[[2]][,1:3], "p, BH"=fdrs[5:8], ints[[2]][,4:7])

#tables for Appendix
print(xtable(ints[[1]], digits=c(0,3,3,3,3,0,3,3,3)), file="tables/hetero_pooled_concern.tex", only.contents = TRUE)
print(xtable(ints[[2]], digits=c(0,3,3,3,3,0,3,3,3)), file="tables/hetero_pooled_widespr.tex", only.contents = TRUE)


